ggplot2 the easiest path to graphics

install.packages(“ggplot2”)

library(ggplot2)
require(data.table)
require(tidyr)
library(GGally)
## Warning: replacing previous import by 'grid::arrow' when loading 'GGally'
## Warning: replacing previous import by 'grid::unit' when loading 'GGally'
library(ggthemes)

http://docs.ggplot2.org/0.9.3/

http://cran.r-project.org/web/packages/ggplot2/ggplot2.pdf

http://www.cookbook-r.com/Graphs/

qplot

data.table(facebook)
##         userid age dob_day dob_year dob_month gender tenure friend_count
##     1: 2094382  14      19     1999        11   male    266            0
##     2: 1192601  14       2     1999        11 female      6            0
##     3: 2083884  14      16     1999        11   male     13            0
##     4: 1203168  14      25     1999        12 female     93            0
##     5: 1733186  14       4     1999        12   male     82            0
##    ---                                                                  
## 98999: 1268299  68       4     1945         4 female    541         2118
## 99000: 1256153  18      12     1995         3 female     21         1968
## 99001: 1195943  15      10     1998         5 female    111         2002
## 99002: 1468023  23      11     1990         4 female    416         2560
## 99003: 1397896  39      15     1974         5 female    397         2049
##        friendships_initiated likes likes_received mobile_likes
##     1:                     0     0              0            0
##     2:                     0     0              0            0
##     3:                     0     0              0            0
##     4:                     0     0              0            0
##     5:                     0     0              0            0
##    ---                                                        
## 98999:                   341  3996          18089         3505
## 99000:                  1720  4401          13412         4399
## 99001:                  1524 11959          12554        11959
## 99002:                   185  4506           6516         4506
## 99003:                   768  9410          12443         9410
##        mobile_likes_received www_likes www_likes_received
##     1:                     0         0                  0
##     2:                     0         0                  0
##     3:                     0         0                  0
##     4:                     0         0                  0
##     5:                     0         0                  0
##    ---                                                   
## 98999:                 11887       491               6202
## 99000:                 10592         2               2820
## 99001:                 11462         0               1092
## 99002:                  5760         0                756
## 99003:                  9530         0               2913
qplot(dob_day,data=facebook)
qplot(dob_day,data=facebook,breaks = 1:31)
qplot(as.factor(dob_day),data=facebook)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

qplot(x=gender,y=friend_count,data=facebook)
qplot(x=gender,y=friend_count,data=facebook,geom="boxplot")
qplot(x=gender,y=friend_count,data=facebook,geom="boxplot",ylim = c(0, 1000))

qplot( friend_count,likes,data=facebook)
qplot( friend_count,likes,data=facebook,alpha=I(0.1) )
qplot( friend_count,likes,data=facebook,alpha=I(0.1), color=gender)

Themes

library(ggthemes)
base <- qplot(dob_day,data=facebook,breaks = 1:31)
base+theme_economist()+scale_colour_economist()
base+theme_tufte()
base<-base+theme_minimal()
base$labels
theme_set(theme_grey())

geom_histogram

base <- ggplot(aes(x = dob_day), data = facebook)
base

head(base$data)
##    userid age dob_day dob_year dob_month gender tenure friend_count
## 1 2094382  14      19     1999        11   male    266            0
## 2 1192601  14       2     1999        11 female      6            0
## 3 2083884  14      16     1999        11   male     13            0
## 4 1203168  14      25     1999        12 female     93            0
## 5 1733186  14       4     1999        12   male     82            0
## 6 1524765  14       1     1999        12   male     15            0
##   friendships_initiated likes likes_received mobile_likes
## 1                     0     0              0            0
## 2                     0     0              0            0
## 3                     0     0              0            0
## 4                     0     0              0            0
## 5                     0     0              0            0
## 6                     0     0              0            0
##   mobile_likes_received www_likes www_likes_received
## 1                     0         0                  0
## 2                     0         0                  0
## 3                     0         0                  0
## 4                     0         0                  0
## 5                     0         0                  0
## 6                     0         0                  0
base$labels
## $x
## [1] "dob_day"
base$layers
## list()
base_histograma      <- ggplot(aes(x = dob_day), data = facebook) + 
                               geom_histogram() 
base_mes_histograma  <- base + geom_histogram()

base_histograma_axes <- ggplot(aes(x = dob_day), data = facebook) + 
                               geom_histogram() + 
                               scale_x_discrete(breaks = round(seq(1,31,4)))

aes

Argments Continuo Discreto
x x x
y x x
colour x x
fill x x
size x x
shape x
linetype x
group x
ggplot(data = facebook) + geom_histogram(aes(x = dob_day, group=as.factor(dob_month)),color="black")

ggplot(data = facebook) + geom_histogram(aes(x = dob_day, fill =as.factor(dob_month)),color="black",linetype="dashed")

ggplot(data = facebook) + geom_histogram(aes(x = dob_day, color=as.factor(dob_month)))

p <- ggplot() +  
          geom_histogram(data = facebook[facebook$dob_day==1 & facebook$dob_month==1 & !is.na(facebook$dob_year) ,] ,
                         aes(x=dob_year , fill=as.factor(age)) , color="black" , binwidth=1)

head(p$data)
## list()
p$layers
## [[1]]
## mapping: x = dob_year, fill = as.factor(age) 
## geom_bar: na.rm = FALSE
## stat_bin: binwidth = 1, bins = NULL, origin = NULL, right = FALSE, na.rm = FALSE
## position_stack
head(p$layers[[1]]$data)
##      userid age dob_day dob_year dob_month gender tenure friend_count
## 9   1365174  13       1     2000         1   male     81            0
## 34  1871735  14       1     1999         1 female    578            0
## 35  1459785  14       1     1999         1   male    478            0
## 36  1215208  14       1     1999         1   male    170            0
## 123 1505398  16       1     1997         1   male    108            0
## 124 2071007  16       1     1997         1   male     29            0
##     friendships_initiated likes likes_received mobile_likes
## 9                       0     0              0            0
## 34                      0     0              0            0
## 35                      0     0              0            0
## 36                      0     0              0            0
## 123                     0     0              0            0
## 124                     0     0              0            0
##     mobile_likes_received www_likes www_likes_received
## 9                       0         0                  0
## 34                      0         0                  0
## 35                      0         0                  0
## 36                      0         0                  0
## 123                     0         0                  0
## 124                     0         0                  0
p

Colors

ggplot(data=data.frame(dimensions=letters[1:2]),aes(x=dimensions,fill=dimensions))+geom_bar()+coord_polar()

ggplot(data=data.frame(dimensions=letters[1:3]),aes(x=dimensions,fill=dimensions))+geom_bar()+coord_polar()

ggplot(data=data.frame(dimensions=letters[1:5]),aes(x=dimensions,fill=dimensions))+geom_bar()+coord_polar()

ggplot(data=data.frame(dimensions=letters[1:20]),aes(x=dimensions,fill=dimensions))+geom_bar()+coord_polar()

position

fc.ten <-ggplot(aes(x = tenure/365, fill=gender), data = facebook)

fc.ten + geom_histogram(binwidth=.25,position="dodge")
fc.ten + geom_histogram(binwidth=.25,position="identity")
fc.ten + geom_histogram(binwidth=.25,position="identity",alpha=0.5)
fc.ten + geom_histogram(binwidth=.25,position="fill",alpha=0.5)

ggplot.obj <- fc.ten + geom_histogram(binwidth=.25,position="fill",alpha=0.5)
ggplot.obj$layers
## [[1]]
## geom_bar: na.rm = FALSE
## stat_bin: binwidth = 0.25, bins = NULL, origin = NULL, right = FALSE, na.rm = FALSE
## position_fill
ggplot.obj$mapping
## * x    -> tenure/365
## * fill -> gender
ggplot.obj$labels
## $x
## [1] "tenure/365"
## 
## $fill
## [1] "gender"
## 
## $y
## [1] "count"

geom_density

fc.ten <-ggplot(aes(x = tenure/365, fill=gender), data = facebook)
fc.ten + geom_histogram(binwidth=.25,position="dodge")
fc.ten + geom_histogram(aes(y = ..count..),binwidth=.25,position="dodge")
fc.ten + geom_histogram(aes(y = ..density..),binwidth=.25,position="dodge")

  fc.ten + geom_density(alpha=0.5)
  fc.ten + geom_density(alpha=0.5,position="stack")
  fc.ten + geom_density(alpha=0.5) + geom_histogram(aes(y = ..density..),binwidth=.25,position="dodge")

ggplot(aes(x = age), data = facebook) + geom_density(alpha=0.5)

ggplot(aes(x = age, fill=gender), data = facebook) + geom_density(alpha=0.5)

ggplot(aes(x = age, fill=gender), data = facebook[!is.na(facebook$gender),]) + geom_density(alpha=0.5)

ggplot(aes(x = age, fill=gender), data = facebook[facebook$gender=="female",])+geom_density(alpha=0.5)

Axe Transformations

ggplot(aes(x = friend_count), data = facebook) +
  geom_histogram(binwidth=25) +
  scale_x_continuous(limits = c(0, 1000), breaks = seq(0, 1000, 100))

ggplot(aes(x = sqrt(friend_count)), data = facebook) + 
  geom_histogram(binwidth=1)

ggplot(aes(x = friend_count), data = facebook) + 
  geom_histogram(binwidth=1)+
  scale_x_sqrt()

ggplot(aes(x = log10(friend_count)), data = facebook) + 
  geom_histogram()

ggplot(aes(x = friend_count), data =facebook) + 
  geom_histogram() +
  scale_x_log10(breaks = c(1,10,100,1000))

head(diamonds)
##   carat       cut color clarity depth table price    x    y    z
## 1  0.23     Ideal     E     SI2  61.5    55   326 3.95 3.98 2.43
## 2  0.21   Premium     E     SI1  59.8    61   326 3.89 3.84 2.31
## 3  0.23      Good     E     VS1  56.9    65   327 4.05 4.07 2.31
## 4  0.29   Premium     I     VS2  62.4    58   334 4.20 4.23 2.63
## 5  0.31      Good     J     SI2  63.3    58   335 4.34 4.35 2.75
## 6  0.24 Very Good     J    VVS2  62.8    57   336 3.94 3.96 2.48
ggplot(data=diamonds,aes(x=carat,y=price))+geom_point(alpha=0.1)

ggplot(data=diamonds,aes(x=carat,y=price,color=color))+geom_point(alpha=0.1)

ggplot(data=diamonds,aes(x=carat,y=price,color=color))+geom_point(alpha=0.1)+scale_y_log10()+scale_x_log10()

Labe and Axes

ggplot(aes(x = friend_count, y = ..count..), data = subset(facebook, !is.na(gender))) + 
  geom_freqpoly(aes(color = gender),binwidth=25) 

ggplot(aes(x = friend_count, y = ..count..), data = subset(facebook, !is.na(gender))) + 
  geom_freqpoly(aes(color = gender),binwidth=25) +
  scale_x_continuous(limits = c(0, 1000), breaks = seq(0, 1000, 100)) + 
          xlab('Friend Count') + 
          ylab('users with that friend count')

ggplot(aes(x = friend_count, y = ..count../sum(..count..)), data = subset(facebook, !is.na(gender))) + 
  geom_freqpoly(aes(color = gender),binwidth=25) +
  scale_x_continuous(limits = c(0, 1000), breaks = seq(0, 1000, 100)) + 
  scale_y_continuous(labels =c("0%","5%","10%"),breaks = c(0,0.05,0.10))+
  xlab('Friend Count') + 
  ylab('Percentage of users with that friend count')

geom_boxplot

fcg <-ggplot(aes(x=gender,y=age), data = subset(facebook, !is.na(gender)))+
  geom_boxplot()

fcg
fcg + geom_jitter(,alpha=1/20)

fcg + ylim(0,60)
fcg + scale_y_continuous(limits = c(0, 60))
fcg + coord_cartesian(ylim = c(0, 60))

Multi Layer

head(mpg)
##   manufacturer model displ year cyl      trans drv cty hwy fl   class
## 1         audi    a4   1.8 1999   4   auto(l5)   f  18  29  p compact
## 2         audi    a4   1.8 1999   4 manual(m5)   f  21  29  p compact
## 3         audi    a4   2.0 2008   4 manual(m6)   f  20  31  p compact
## 4         audi    a4   2.0 2008   4   auto(av)   f  21  30  p compact
## 5         audi    a4   2.8 1999   6   auto(l5)   f  16  26  p compact
## 6         audi    a4   2.8 1999   6 manual(m5)   f  18  26  p compact
ggplot(data=mpg,aes(x=as.factor(cyl),y=cty))+geom_boxplot(color="blue")

ggplot(data=mpg,aes(x=as.factor(cyl),y=hwy))+geom_boxplot(color="brown")

ggplot(data=mpg,aes(x=as.factor(cyl)))+geom_boxplot(aes(y=cty),color="blue")+geom_boxplot(aes(y=hwy),color="brown")

ggplot(data=mpg,aes(x=as.factor(cyl)))+
  geom_violin(aes(y=cty),color="blue",alpha=0.5)+
  geom_violin(aes(y=hwy),color="brown",alpha=0.5)+
  geom_point(aes(y=(cty+hwy)/2),color="black",alpha=0.5,position=position_jitter(h=0,w=0.2 ) )

geom_point

fcp <- ggplot(aes(x = age, y = friend_count), data = facebook)
fcp + geom_point()

fcp <-fcp + xlim(min(facebook$age),100)
fcp + geom_point()

fcp + geom_point ( alpha=1/20)

fcp + geom_jitter( alpha=1/20)

fcp + geom_point ( alpha=1/20) + coord_trans(y = 'sqrt')

fcp + geom_point ( alpha=1/20, position=position_jitter() ) + coord_trans(y = 'sqrt')
## Error in if (zero_range(range)) {: valor ausente donde TRUE/FALSE es necesario
fcp + geom_point ( alpha=1/20, position=position_jitter(h=0) ) + coord_trans(y = 'sqrt')

fcp + geom_point ( alpha=1/20, position=position_jitter(h=0) ) + scale_y_log10()

stat=“summary”

facebook <- as.data.table(facebook)
gruped.friend_count <- facebook[,.(friend_count_mean=mean(friend_count)),by=.(age,gender)]
gruped.friend_count 
##      age gender friend_count_mean
##   1:  14   male          164.1456
##   2:  14 female          362.4286
##   3:  13   male          102.1340
##   4:  13 female          259.1606
##   5:  15   male          200.6658
##  ---                             
## 270:  33     NA          493.0000
## 271:  36     NA          515.0000
## 272:  95     NA          830.0000
## 273:  68     NA          790.0000
## 274:  50     NA          863.0000
gruped.friend_count <- spread(gruped.friend_count,gender,friend_count_mean)
gruped.friend_count
##      age   female     male       NA
##   1:  13 259.1606 102.1340       NA
##   2:  14 362.4286 164.1456       NA
##   3:  15 538.6813 200.6658 116.0000
##   4:  16 519.5145 239.6748       NA
##   5:  17 538.9943 236.4924 106.5000
##  ---                               
##  97: 109 207.5000 163.0000       NA
##  98: 110 239.1429 410.5714 503.0000
##  99: 111 244.4286 246.5000 148.0000
## 100: 112 201.2000 594.0769       NA
## 101: 113 278.6571 410.3956 166.3333
gather(gruped.friend_count,gender,friend_count_mean,-age)
##     age gender friend_count_mean
## 1    13 female         259.16062
## 2    14 female         362.42857
## 3    15 female         538.68130
## 4    16 female         519.51454
## 5    17 female         538.99434
## 6    18 female         481.97938
## 7    19 female         470.80075
## 8    20 female         380.82445
## 9    21 female         307.10637
## 10   22 female         251.91519
## 11   23 female         239.72114
## 12   24 female         237.07385
## 13   25 female         153.88349
## 14   26 female         175.13443
## 15   27 female         158.72441
## 16   28 female         154.22850
## 17   29 female         145.78139
## 18   30 female         130.57167
## 19   31 female         144.18196
## 20   32 female         125.27099
## 21   33 female         125.12393
## 22   34 female         133.09534
## 23   35 female         123.18026
## 24   36 female         121.36768
## 25   37 female         131.34301
## 26   38 female         129.90657
## 27   39 female         111.79019
## 28   40 female         113.72446
## 29   41 female         112.27832
## 30   42 female         114.06728
## 31   43 female         113.79487
## 32   44 female         126.23184
## 33   45 female         134.87387
## 34   46 female         133.71543
## 35   47 female         127.39906
## 36   48 female         116.54730
## 37   49 female         124.84435
## 38   50 female         133.03261
## 39   51 female         119.39596
## 40   52 female         130.06362
## 41   53 female         119.90717
## 42   54 female         106.35630
## 43   55 female         109.34639
## 44   56 female         116.01681
## 45   57 female         118.64242
## 46   58 female         121.38144
## 47   59 female         110.94013
## 48   60 female         124.70660
## 49   61 female         111.81140
## 50   62 female         115.11463
## 51   63 female         125.80321
## 52   64 female         103.54570
## 53   65 female          93.96193
## 54   66 female         106.15206
## 55   67 female          86.98773
## 56   68 female         326.10682
## 57   69 female          93.40078
## 58   70 female          73.97285
## 59   71 female         123.43284
## 60   72 female          84.53529
## 61   73 female         131.23392
## 62   74 female         133.48611
## 63   75 female          93.02542
## 64   76 female          70.32258
## 65   77 female          92.69000
## 66   78 female         197.39773
## 67   79 female          95.12500
## 68   80 female         195.41667
## 69   81 female         141.91525
## 70   82 female          95.44444
## 71   83 female         256.52381
## 72   84 female         363.17391
## 73   85 female         313.21951
## 74   86 female         336.71053
## 75   87 female         139.76471
## 76   88 female         246.11538
## 77   89 female         291.95652
## 78   90 female         241.00000
## 79   91 female         368.00000
## 80   92 female         708.76471
## 81   93 female         254.37500
## 82   94 female         493.15152
## 83   95 female         591.35714
## 84   96 female         544.82353
## 85   97 female         344.04762
## 86   98 female         482.35556
## 87   99 female         557.56000
## 88  100 female         338.69853
## 89  101 female         428.98413
## 90  102 female         471.61111
## 91  103 female         520.07407
## 92  104 female         443.02564
## 93  105 female         400.31034
## 94  106 female         370.20513
## 95  107 female         315.34211
## 96  108 female         371.86699
## 97  109 female         207.50000
## 98  110 female         239.14286
## 99  111 female         244.42857
## 100 112 female         201.20000
## 101 113 female         278.65714
## 102  13   male         102.13402
## 103  14   male         164.14564
## 104  15   male         200.66576
## 105  16   male         239.67478
## 106  17   male         236.49242
## 107  18   male         233.91833
## 108  19   male         254.58405
## 109  20   male         225.59212
## 110  21   male         195.32077
## 111  22   male         187.25316
## 112  23   male         184.92139
## 113  24   male         158.67225
## 114  25   male         120.07286
## 115  26   male         129.05213
## 116  27   male         121.55383
## 117  28   male         111.93434
## 118  29   male         107.44754
## 119  30   male         106.94803
## 120  31   male         103.24788
## 121  32   male         108.01306
## 122  33   male          92.04784
## 123  34   male          94.29045
## 124  35   male          83.65303
## 125  36   male          93.26812
## 126  37   male          82.09492
## 127  38   male          98.04558
## 128  39   male          93.29346
## 129  40   male          71.31250
## 130  41   male          87.22967
## 131  42   male          81.41617
## 132  43   male          91.43375
## 133  44   male          77.50096
## 134  45   male          82.94523
## 135  46   male          87.91368
## 136  47   male          88.93038
## 137  48   male         102.76275
## 138  49   male          85.27027
## 139  50   male         116.72871
## 140  51   male          83.02521
## 141  52   male          86.11043
## 142  53   male         104.12326
## 143  54   male          99.91136
## 144  55   male         116.86175
## 145  56   male          94.64232
## 146  57   male         100.64368
## 147  58   male         108.73333
## 148  59   male          92.33770
## 149  60   male          98.15015
## 150  61   male         125.98933
## 151  62   male         107.66975
## 152  63   male         128.35556
## 153  64   male         120.81928
## 154  65   male         101.97643
## 155  66   male         118.62058
## 156  67   male          90.65217
## 157  68   male         313.84055
## 158  69   male          97.50495
## 159  70   male         102.04255
## 160  71   male         105.18543
## 161  72   male          83.58871
## 162  73   male         101.77215
## 163  74   male         150.52985
## 164  75   male         115.58036
## 165  76   male         110.27381
## 166  77   male         236.74627
## 167  78   male          92.89041
## 168  79   male          80.13158
## 169  80   male         368.34375
## 170  81   male         122.69388
## 171  82   male         280.73333
## 172  83   male         255.28409
## 173  84   male         301.30769
## 174  85   male         297.51220
## 175  86   male         396.78378
## 176  87   male         473.70833
## 177  88   male         500.57143
## 178  89   male         497.31429
## 179  90   male         291.43902
## 180  91   male         350.42222
## 181  92   male         277.57576
## 182  93   male         274.80189
## 183  94   male         413.21739
## 184  95   male         300.79167
## 185  96   male         411.11429
## 186  97   male         490.02857
## 187  98   male         369.25000
## 188  99   male         343.12500
## 189 100   male         373.78689
## 190 101   male         373.67391
## 191 102   male         415.15044
## 192 103   male         427.85895
## 193 104   male         414.00000
## 194 105   male         415.44681
## 195 106   male         288.30588
## 196 107   male         393.35000
## 197 108   male         369.43898
## 198 109   male         163.00000
## 199 110   male         410.57143
## 200 111   male         246.50000
## 201 112   male         594.07692
## 202 113   male         410.39560
## 203  13   <NA>                NA
## 204  14   <NA>                NA
## 205  15   <NA>         116.00000
## 206  16   <NA>                NA
## 207  17   <NA>         106.50000
## 208  18   <NA>                NA
## 209  19   <NA>         374.00000
## 210  20   <NA>          65.00000
## 211  21   <NA>          79.00000
## 212  22   <NA>                NA
## 213  23   <NA>         246.50000
## 214  24   <NA>                NA
## 215  25   <NA>          25.40000
## 216  26   <NA>          65.00000
## 217  27   <NA>           7.00000
## 218  28   <NA>          27.00000
## 219  29   <NA>          41.00000
## 220  30   <NA>                NA
## 221  31   <NA>           2.00000
## 222  32   <NA>                NA
## 223  33   <NA>         493.00000
## 224  34   <NA>                NA
## 225  35   <NA>                NA
## 226  36   <NA>         515.00000
## 227  37   <NA>                NA
## 228  38   <NA>         244.00000
## 229  39   <NA>                NA
## 230  40   <NA>                NA
## 231  41   <NA>                NA
## 232  42   <NA>          19.00000
## 233  43   <NA>                NA
## 234  44   <NA>                NA
## 235  45   <NA>                NA
## 236  46   <NA>                NA
## 237  47   <NA>          37.00000
## 238  48   <NA>          43.00000
## 239  49   <NA>          73.00000
## 240  50   <NA>         863.00000
## 241  51   <NA>                NA
## 242  52   <NA>          57.66667
## 243  53   <NA>         124.50000
## 244  54   <NA>         178.66667
## 245  55   <NA>         124.16667
## 246  56   <NA>         155.00000
## 247  57   <NA>          70.50000
## 248  58   <NA>          39.33333
## 249  59   <NA>           4.00000
## 250  60   <NA>         128.83333
## 251  61   <NA>          43.00000
## 252  62   <NA>          26.00000
## 253  63   <NA>          74.50000
## 254  64   <NA>          80.80000
## 255  65   <NA>          95.50000
## 256  66   <NA>          69.75000
## 257  67   <NA>          10.50000
## 258  68   <NA>         790.00000
## 259  69   <NA>          39.00000
## 260  70   <NA>         160.33333
## 261  71   <NA>                NA
## 262  72   <NA>          67.50000
## 263  73   <NA>         470.00000
## 264  74   <NA>         557.66667
## 265  75   <NA>          60.33333
## 266  76   <NA>          17.00000
## 267  77   <NA>          23.50000
## 268  78   <NA>          30.00000
## 269  79   <NA>          69.00000
## 270  80   <NA>                NA
## 271  81   <NA>                NA
## 272  82   <NA>          98.66667
## 273  83   <NA>          21.00000
## 274  84   <NA>         254.00000
## 275  85   <NA>           9.00000
## 276  86   <NA>          64.00000
## 277  87   <NA>          52.00000
## 278  88   <NA>                NA
## 279  89   <NA>         221.00000
## 280  90   <NA>         213.66667
## 281  91   <NA>          49.50000
## 282  92   <NA>         109.00000
## 283  93   <NA>          33.00000
## 284  94   <NA>         279.00000
## 285  95   <NA>         830.00000
## 286  96   <NA>         437.00000
## 287  97   <NA>                NA
## 288  98   <NA>                NA
## 289  99   <NA>         138.50000
## 290 100   <NA>         217.16667
## 291 101   <NA>         199.50000
## 292 102   <NA>        1346.50000
## 293 103   <NA>         280.87500
## 294 104   <NA>                NA
## 295 105   <NA>         187.50000
## 296 106   <NA>          68.00000
## 297 107   <NA>                NA
## 298 108   <NA>         281.76190
## 299 109   <NA>                NA
## 300 110   <NA>         503.00000
## 301 111   <NA>         148.00000
## 302 112   <NA>                NA
## 303 113   <NA>         166.33333
ggplot(aes(x=age,y=female/male) , data = gruped.friend_count )+
  geom_line()+
  geom_hline(yintercept=1 , alpha=0.3 , linetype=2)

facebook$Year_joined<-floor(2014-facebook$tenure/365)
facebook$Year_joined_buket<-cut(facebook$Year_joined, c(2004,2009,2011,2012,2014))
facebook.joined <- facebook[!is.na(gender),.(friend_count.mean=mean(friend_count),friend_count.sum=sum(friend_count)),by=.(Year_joined_buket,age)]
facebook.joined
##      Year_joined_buket age friend_count.mean friend_count.sum
##   1:       (2012,2014]  14          208.7742           302305
##   2:       (2012,2014]  13          135.6682            57659
##   3:       (2012,2014]  15          212.3026           326946
##   4:       (2011,2012]  14          350.8129           157515
##   5:       (2012,2014]  16          211.3181           356705
##  ---                                                         
## 390:       (2011,2012] 105          315.4000             1577
## 391:                NA 103          137.0000              137
## 392:       (2004,2009]  16          963.1429             6742
## 393:       (2009,2011] 111          457.3333             1372
## 394:       (2004,2009]  15         1871.0000             1871
ggplot(data=facebook.joined)+geom_point(aes(x=age,y=friend_count.sum,color=Year_joined_buket))
ggplot(data=facebook.joined)+geom_line(aes(x=age,y=friend_count.sum,color=Year_joined_buket))

fc.NoNA <- ggplot(aes(x=age,y=friend_count) , data = subset(facebook, !is.na(gender)) )
fc.NoNA + geom_point(aes(color=Year_joined_buket))

fc.NoNA + geom_point(aes(color=Year_joined_buket), stat="summary",fun.y=sum)

fc.NoNA + geom_line (aes(color=Year_joined_buket), stat="summary",fun.y=sum)

geom_line

fc.NoNA + geom_line (aes(color=Year_joined_buket), stat="summary",fun.y=mean)
fc.NoNA + geom_line (aes(color=Year_joined_buket), stat="summary",fun.y=mean)+
          geom_line (stat="summary",fun.y=mean,linetype=2)

fcp + geom_point(alpha=1/10,
                position=position_jitter(h=0),
                color="darkgreen") +
  geom_line(stat="summary",fun.y=mean) +
  geom_line(stat="summary",fun.y=quantile,fun.args=list(probs=.1),linetype=2,color="blue") +
  geom_line(stat="summary",fun.y=quantile,fun.args=list(probs=.5),linetype=1,color="blue") +
  geom_line(stat="summary",fun.y=quantile,fun.args=list(probs=.9),linetype=2,color="blue") +
  coord_trans(y = "sqrt")

ggplot( aes(x=tenure,y=friendships_initiated), data=subset(facebook,tenure>0) )+
        geom_line(aes(color=Year_joined_buket), stat="summary", fun.y=mean)

line.layer <- geom_line(aes(color=Year_joined_buket), stat="summary", fun.y=mean)

ggplot( aes(x=tenure             ,y=friendships_initiated/tenure), data=subset(facebook,tenure>0) ) + line.layer 

ggplot( aes(x=7*round(tenure/7)  ,y=friendships_initiated/tenure), data=subset(facebook,tenure>0) ) + line.layer

ggplot( aes(x=90*round(tenure/90),y=friendships_initiated/tenure), data=subset(facebook,tenure>0) ) + line.layer

geom_smooth

fsmoth <- ggplot(aes(x=tenure,y=friendships_initiated/tenure), data=subset(facebook,tenure>0) )

fsmoth +  geom_smooth(aes(color=Year_joined_buket,fill=Year_joined_buket))
fsmoth +  geom_smooth(aes(group=Year_joined_buket))
fsmoth +  geom_smooth()

geom_rug

 rug.plot <- ggplot(aes(x=tenure,y=friendships_initiated/tenure), data=subset(facebook,tenure>0) )+
  geom_smooth(aes(color=Year_joined_buket,fill=Year_joined_buket))

rug.plot + geom_rug()

rug.plot + geom_rug(data=facebook[facebook$tenure>0,][sample(nrow(facebook[facebook$tenure>0,]), 1000),] ,alpha=0.1)

rug.plot + geom_rug(data=facebook[facebook$tenure>0,][sample(nrow(facebook[facebook$tenure>0,]), 1000),] ,alpha=0.1)+ coord_cartesian(ylim = c(-0.5, 5))

sample.facebook <- facebook[facebook$tenure>0,][sample(nrow(facebook[facebook$tenure>0,]), 1000),]
fc.ten.sample <-ggplot(aes(x = tenure/365, fill=gender), data = sample.facebook )

fc.ten.sample + geom_density(alpha=0.5)
fc.ten.sample + geom_density(alpha=0.5)+geom_rug(alpha=0.1)
fc.ten.sample + geom_density(alpha=0.5)+geom_rug(aes(color=gender),alpha=0.1)

Faceting

fc.ten.sample + geom_density(alpha=0.5)+geom_rug(aes(color=gender),alpha=0.1)+ facet_wrap(~gender)

f.dob_day <- ggplot(aes(x = dob_day), data = facebook) + 
                               geom_histogram() + 
                               scale_x_continuous(breaks = round(seq(1,31,4)))

f.dob_day + facet_wrap(~dob_month)

f.dob_day + facet_wrap(~dob_month,nrow = 2, ncol = 6)

f.dob_day + facet_grid(gender~dob_month)

f.dob_day + facet_grid(gender~dob_month,scales="free")

f.dob_day + facet_grid(gender~dob_month,scales="free",space="free")

ggpairs

library(GGally)
set.seed(1234)
facebook_subset<-facebook[,.(gender,tenure,friend_count,friendships_initiated, likes, likes_received)]
ggpairs(facebook_subset[sample.int(nrow(facebook_subset),1000),])

geom_tiles

pp <- function (n,r=4) {
  x <- seq(-r*pi, r*pi, len=n)
  df <- expand.grid(x=x, y=x)
  df$r <- sqrt(df$x^2 + df$y^2)
  df$z <- cos(df$r^2)*exp(-df$r/6)
  df
}

dt <- data.table( pp(200))
dt
##                x         y        r            z
##     1: -12.56637 -12.56637 17.77153 -0.005023290
##     2: -12.44008 -12.56637 17.68245  0.004230445
##     3: -12.31378 -12.56637 17.59383 -0.005105300
##     4: -12.18749 -12.56637 17.50567  0.007714216
##     5: -12.06119 -12.56637 17.41798 -0.012093846
##    ---                                          
## 39996:  12.06119  12.56637 17.41798 -0.012093846
## 39997:  12.18749  12.56637 17.50567  0.007714216
## 39998:  12.31378  12.56637 17.59383 -0.005105300
## 39999:  12.44008  12.56637 17.68245  0.004230445
## 40000:  12.56637  12.56637 17.77153 -0.005023290
ggplot(data=dt) + geom_tile(aes(x=x, y=y, fill=z))

dt$xbind <- cut(dt$x,breaks=seq(min(dt$x),max(dt$x),length.out = 100))
dt$ybind <- cut(dt$y,breaks=seq(min(dt$y),max(dt$y),length.out = 100))
dt.bind<- dt[,.(mean.x=mean(x),mean.y=mean(y),fill.z=mean(z)),by=.(xbind,ybind)]
ggplot( data=dt[,.(fill.z=mean(z)),by=.(xbind,ybind)] ) + geom_tile(aes(x=xbind, y=ybind, fill=fill.z))

dt <- data.frame(cbind(rnorm(10000),rnorm(10000)) %*% matrix(c(2,1,3,3), nrow=2))
ggplot(data=dt) + geom_bin2d(aes(x=X1, y=X2), binwidth = c(0.2, 0.2))

ggplot(data=dt) + geom_density2d(aes(x=X1, y=X2))

ggplot(data=dt) + stat_density2d(aes(x=X1, y=X2,fill=..level..,alpha=..level..), geom="polygon")

ggplot(data=dt) + geom_bin2d(aes(x=X1, y=X2), binwidth = c(0.2, 0.2)) + geom_density2d(aes(x=X1, y=X2),color="white")

Maps

library(ggmap)
library(sp)
MunicipisCataluna <- fread("MunicipisCatalunya.csv",sep=";",stringsAsFactors=T)
MunicipisCataluna[,c("x","y"):=list(as.numeric(gsub(",",".",lon,fixed = T)),as.numeric(gsub(",",".",lat,fixed = T)))]
MunicipisCataluna[,c("lat","lon"):=NULL]
MunicipisCataluna <- unite(MunicipisCataluna,region,c(municipio,compontnt),sep=" ",remove=FALSE)

MunicipisCataluna
##                  region     municipio codigoine compontnt order        x
##      1:       Guixers 1       Guixers     25111         1     1 1.520599
##      2:       Guixers 1       Guixers     25111         1     2 1.529528
##      3:       Guixers 1       Guixers     25111         1     3 1.531676
##      4:       Guixers 1       Guixers     25111         1     4 1.538425
##      5:       Guixers 1       Guixers     25111         1     5 1.545793
##     ---                                                                 
## 450103: Cervià de Ter 1 Cervià de Ter     17050         1   340 2.884106
## 450104: Cervià de Ter 1 Cervià de Ter     17050         1   341 2.884365
## 450105: Cervià de Ter 1 Cervià de Ter     17050         1   342 2.884485
## 450106: Cervià de Ter 1 Cervià de Ter     17050         1   343 2.884270
## 450107: Cervià de Ter 1 Cervià de Ter     17050         1   344 2.884730
##                y
##      1: 42.15181
##      2: 42.15180
##      3: 42.15206
##      4: 42.15184
##      5: 42.15217
##     ---         
## 450103: 42.06261
## 450104: 42.06280
## 450105: 42.06305
## 450106: 42.06490
## 450107: 42.06616
ggplot(MunicipisCataluna, aes(map_id = region)) +
  geom_map( aes(fill = codigoine) , color="black", map = MunicipisCataluna) + 
  expand_limits(x = MunicipisCataluna$x, y = MunicipisCataluna$y)+ 
  theme( legend.position = "none" )

background.map <- get_map(location=c(mean(MunicipisCataluna$x),mean(MunicipisCataluna$y)),source="google",maptype="toner",zoom=8)
mon.shapes <- geom_polygon(data=MunicipisCataluna,aes(x = MunicipisCataluna$x, y = MunicipisCataluna$y-0.017,group = region, fill=codigoine),alpha=0.7,color=rgb(0,0,0,.3))


ggmap(background.map) +
  mon.shapes  +
  expand_limits(x = MunicipisCataluna$x, y = MunicipisCataluna$y)+
  theme(legend.position="none")